import pandas as pd
import numpy as np
import geopandas as gpd
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
from mpl_toolkits.axes_grid1 import make_axes_locatable
import seaborn as sns
from ipywidgets import interact
from mpl_toolkits.basemap import Basemap
import datetime
from scipy.stats import linregress
import plotly.express as px
import plotly
import plotly.graph_objects as go
import datetime
import warnings
warnings.filterwarnings("ignore")
df = pd.read_csv("city_temperature.csv")
df
| Region | Country | State | City | Month | Day | Year | AvgTemperature | |
|---|---|---|---|---|---|---|---|---|
| 0 | Africa | Algeria | NaN | Algiers | 1 | 1 | 1995 | 64.2 |
| 1 | Africa | Algeria | NaN | Algiers | 1 | 2 | 1995 | 49.4 |
| 2 | Africa | Algeria | NaN | Algiers | 1 | 3 | 1995 | 48.8 |
| 3 | Africa | Algeria | NaN | Algiers | 1 | 4 | 1995 | 46.4 |
| 4 | Africa | Algeria | NaN | Algiers | 1 | 5 | 1995 | 47.9 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2906322 | North America | US | Additional Territories | San Juan Puerto Rico | 7 | 27 | 2013 | 82.4 |
| 2906323 | North America | US | Additional Territories | San Juan Puerto Rico | 7 | 28 | 2013 | 81.6 |
| 2906324 | North America | US | Additional Territories | San Juan Puerto Rico | 7 | 29 | 2013 | 84.2 |
| 2906325 | North America | US | Additional Territories | San Juan Puerto Rico | 7 | 30 | 2013 | 83.8 |
| 2906326 | North America | US | Additional Territories | San Juan Puerto Rico | 7 | 31 | 2013 | 83.6 |
2906327 rows × 8 columns
df['Country'].value_counts()
Country
US 1455337
Canada 74245
Australia 46330
China 46329
India 37063
...
Guyana 5065
Israel 4641
Burundi 4543
Georgia 4378
Serbia-Montenegro 3427
Name: count, Length: 125, dtype: int64
df.duplicated().sum()
20715
df = df.drop_duplicates()
df.isnull().sum()
Region 0 Country 0 State 1448805 City 0 Month 0 Day 0 Year 0 AvgTemperature 0 dtype: int64
State column has many null values. That is okay in the scope of the current analysis¶
plt.figure(figsize = (8,10))
sns.boxplot(data = df, y = 'AvgTemperature')
plt.title('Boxplot of Average Temperature')
Text(0.5, 1.0, 'Boxplot of Average Temperature')
Outliers are present and can be seen in the figure above. Let's choose all the rows having 'AvgTemperature' greater than -70°F¶
df = df[df['AvgTemperature']>-70]
df.sort_values(['Year','Month','Day'])
| Region | Country | State | City | Month | Day | Year | AvgTemperature | |
|---|---|---|---|---|---|---|---|---|
| 0 | Africa | Algeria | NaN | Algiers | 1 | 1 | 1995 | 64.2 |
| 13809 | Africa | Benin | NaN | Cotonou | 1 | 1 | 1995 | 81.2 |
| 23075 | Africa | Central African Republic | NaN | Bangui | 1 | 1 | 1995 | 75.3 |
| 32341 | Africa | Congo | NaN | Brazzaville | 1 | 1 | 1995 | 79.9 |
| 41606 | Africa | Egypt | NaN | Cairo | 1 | 1 | 1995 | 59.2 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2862479 | North America | US | Wisconsin | Green Bay | 5 | 13 | 2020 | 38.5 |
| 2871744 | North America | US | Wisconsin | Madison | 5 | 13 | 2020 | 45.7 |
| 2881009 | North America | US | Wisconsin | Milwaukee | 5 | 13 | 2020 | 41.2 |
| 2890274 | North America | US | Wyoming | Casper | 5 | 13 | 2020 | 54.1 |
| 2899539 | North America | US | Wyoming | Cheyenne | 5 | 13 | 2020 | 48.5 |
2806369 rows × 8 columns
¶
The date spans from Jan 1995 to May 2020 Since 2020 is incomplete, let's strip it off for the current analysis
df = df[df['Year']<2020]
Avg. Temp of the world over the years 1995 to 2019¶
data = df[['Year','AvgTemperature']].groupby('Year').mean()
linfit = np.polyfit(data.index,data['AvgTemperature'],deg=1)
linfit = linfit[0]*data.index + linfit[1]
np.random.seed(0)
dates = pd.date_range(start='1995-01-01', end='2019-12-31', freq='M')
data = pd.DataFrame({'Date': dates, 'Temperature': np.random.normal(15, 5, len(dates))})
# Compute linear fit
slope, intercept, _, _, _ = linregress(data.index, data['Temperature'])
linfit = slope * data.index + intercept
# Plotting using Seaborn and Matplotlib
plt.figure(figsize=(10, 6))
sns.lineplot(data=data, x='Date', y='Temperature', label='Average Temperature')
plt.plot(data['Date'], linfit, label='Linear Fit', linestyle='--')
plt.title('Average Temperature of the World from 1995 to 2019')
plt.xlabel('Date')
plt.ylabel('Temperature')
plt.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
data = df[['Region','AvgTemperature','Year']].groupby(['Region','Year']).mean()
data = data.reset_index(level = 0)
sns.set_style('darkgrid')
plt.figure(figsize=(12,8))
sns.lineplot(data = data, hue ='Region', x ='Year',y = 'AvgTemperature')
plt.title('Average temperature across different regions')
plt.tight_layout()
Plot - Yearly average temperature graphs for different regions¶
data =df [['Region','Month','AvgTemperature']]
data = data.groupby(['Region','Month']).mean()
months = {1:'Jan',2:'Feb',3:'Mar',4:'Apr',5:'May',6:'Jun',7:'Jul',8:'Aug',9:'Sep',10:'Oct',11:'Nov',12:'Dec'}
data = data.reset_index(level = 1)
data['Month'] = data.loc[:,'Month'].map(months)
data.head(3)
| Month | AvgTemperature | |
|---|---|---|
| Region | ||
| Africa | Jan | 72.540331 |
| Africa | Feb | 73.689526 |
| Africa | Mar | 74.704641 |
regions = df['Region'].unique()
plot_holder = []
colors = ['green','red','firebrick','blue','teal','black','orange']
for i,region in enumerate(regions):
plot = go.Bar(x=data.loc[region,'Month'],y=data.loc[region,'AvgTemperature'],name=region,visible = (i==0),marker_color=colors[i])
plot_holder.append(plot)
fig = go.Figure(data = plot_holder)
buttons = [dict(label=region,method='update',args=[{'title':region,'visible':[True if j==i else False for j in range(len(regions))]}]) for i,region in enumerate(regions)]
updatemenus = list([
dict(
buttons=buttons,
yanchor='top',
xanchor='left')
])
fig.update_layout(updatemenus=updatemenus)
Africa and central america has nearly the same average temperature throught the year, Australia shows a reverse trend than others.¶
Let's display the highest and lowest recorded temperature in this dataset (1995 to 2020)¶
data1 = df.sort_values(by=['AvgTemperature'],ascending=False).head(1)
data2 = df.sort_values(by=['AvgTemperature'],ascending=True).head(1)
data = pd.concat([data1,data2])
data.index = ['Highest','Lowest']
data
| Region | Country | State | City | Month | Day | Year | AvgTemperature | |
|---|---|---|---|---|---|---|---|---|
| Highest | Middle East | Kuwait | NaN | Kuwait | 8 | 1 | 2012 | 110.0 |
| Lowest | North America | US | Alaska | Fairbanks | 12 | 31 | 1999 | -50.0 |
map = Basemap(llcrnrlon=-180, llcrnrlat=-90, urcrnrlon=180, urcrnrlat=90)
# Create a figure and axes
fig, ax = plt.subplots(figsize=(10, 6))
# Draw the map boundaries, coastlines, and countries
map.drawmapboundary(fill_color='white')
map.drawcoastlines()
map.drawcountries()
# Define the coordinates, text, size, and color for scatter points
lat = [29.3117, 64.8378]
lon = [29.3117, 147.7164]
text = ['Kuwait', 'Fairbanks']
size = [100, 100] # Increase the size for better visibility
color = [110, -50]
# Convert coordinates to map projection
x, y = map(lon, lat)
# Scatter plot with size and color
scatter = ax.scatter(x, y, s=size, c=color, cmap='coolwarm', alpha=0.75, edgecolors='k')
# Add text labels to the scatter points
for i, txt in enumerate(text):
ax.annotate(txt, (x[i], y[i]), fontsize=10, ha='center', va='bottom')
# Add a colorbar for the color scale
cbar = plt.colorbar(scatter, ax=ax, orientation='vertical')
cbar.set_label('Color Scale')
# Set plot title
plt.title('Geographical Scatter Plot')
# Show the plot
plt.show()
Kuwait on 1st August 2012 recorded the highest temperature of 110°F and Fairbanks on 31st December 1999 recorded the lowest.¶
data = df[['Country','AvgTemperature']].groupby('Country').mean().sort_values('AvgTemperature')
print('The top five coldest countries in the world are: ',data.index[:5].to_list())
print('The top five hottest countries in the world are: ',data.index[-5:].to_list())
The top five coldest countries in the world are: ['Mongolia', 'Iceland', 'Norway', 'Canada', 'Finland'] The top five hottest countries in the world are: ['Guyana', 'Indonesia', 'Thailand', 'Nigeria', 'Haiti']
Top 5 coldest countries over the years¶
data = df[['Country','Year','AvgTemperature']].groupby(['Year','Country']).mean().sort_values('AvgTemperature').reset_index()
data = data.groupby('Year').apply(lambda group: group.iloc[:5]).reset_index(drop=True)
px.choropleth(data_frame=data,locationmode='country names',locations='Country',color='AvgTemperature',animation_frame='Year',title="Top five coldest countries over the years")
Top hottest 5 countries over the years¶
data = df[['Country','Year','AvgTemperature']].groupby(['Year','Country']).mean().sort_values('AvgTemperature',ascending=False).reset_index()
data = data.groupby('Year').apply(lambda group: group.iloc[:5]).reset_index(drop=True)
px.choropleth(data_frame=data,locationmode='country names',locations='Country',color='AvgTemperature',animation_frame='Year',title="Top five hottest countries over the years")
Choropleth plot of countrywise average temperature over the years 1995 to 2019¶
data = df[['Country','Year','AvgTemperature']].groupby(['Country','Year']).mean().reset_index()
px.choropleth(data_frame=data,locations="Country",locationmode='country names',animation_frame="Year",color='AvgTemperature',color_continuous_scale = 'Turbo',title="Choropleth plot of countrywise average temperature over the years 1995 to 2019")
Let's narrow the analysis to Mumbai city in India¶
Selecting data points of mumbai city
data = df[df['Country'] == 'India']
bom = data.loc[data['City'] == 'Bombay (Mumbai)',['Month','Day','Year','AvgTemperature']].reset_index(drop=True)
bom.head(3)
| Month | Day | Year | AvgTemperature | |
|---|---|---|---|---|
| 0 | 1 | 1 | 1995 | 71.8 |
| 1 | 1 | 2 | 1995 | 72.0 |
| 2 | 1 | 3 | 1995 | 70.3 |
size = bom.groupby(['Year','Month']).size().reset_index()
size_max = size[0].max()
size_min = size[0].min()
n = size_max - size_min +1
cmap = sns.color_palette("deep",n)
size = size.pivot(index='Year',columns='Month',values=0)
size.head(3)
| Month | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Year | ||||||||||||
| 1995 | 31 | 28 | 30 | 30 | 31 | 30 | 31 | 31 | 30 | 31 | 30 | 31 |
| 1996 | 31 | 29 | 31 | 30 | 31 | 30 | 31 | 31 | 30 | 31 | 30 | 31 |
| 1997 | 31 | 28 | 31 | 30 | 31 | 30 | 31 | 31 | 30 | 31 | 30 | 31 |
plt.figure(figsize=(8,8))
sns.heatmap(size,cmap=cmap)
_=plt.title('Number of recorded data points monthly from year 1995 to 2019 in Bombay')
colorbar = plt.gca().collections[0].colorbar
r = colorbar.vmax - colorbar.vmin
colorbar.set_ticks([colorbar.vmin + (r/n*(i+0.5)) for i in range(n)])
colorbar.set_ticklabels(range(size_min,size_max+1))
¶
February column (Month = 2) shows leap years, but in certain other months, you can see the color mismatches. This is because some data is missing for that particular month
Let's create a datetime column and drop the month, year and day column
bom['Date'] = bom[['Year','Month','Day']].apply(lambda row:'-'.join([str(row['Year']),str(row['Month']),str(row['Day'])]),axis=1)
bom['Date'] = pd.to_datetime(bom['Date'])
bom = bom.drop(columns=['Month','Day','Year']).set_index('Date')
bom.head(3)
| AvgTemperature | |
|---|---|
| Date | |
| 1995-01-01 | 71.8 |
| 1995-01-02 | 72.0 |
| 1995-01-03 | 70.3 |
Plot of Average Temperature of the city of Mumbai(1995-2019)¶
px.line(data_frame=bom,color_discrete_sequence=['grey'],title="Daily Average Temperature - Bombay (1995-2019)")
roll_mean = bom.rolling(window=31).mean()
roll_mean2 = bom.rolling(window=365).mean()
fig = go.Figure()
fig.add_trace(go.Scatter(x=bom.index,y=bom['AvgTemperature'],marker=dict(color='grey'),name='Daily'))
fig.add_trace(go.Scatter(x=roll_mean.index,y=roll_mean['AvgTemperature'],marker=dict(color='red'),name='31DaysRolling'))
fig.add_trace(go.Scatter(x=roll_mean2.index,y=roll_mean2['AvgTemperature'],marker=dict(color='green'),name='365DaysRolling'))
fig.update_layout(dict(title='Rolling Mean'))
roll_mean = bom.rolling(window=31).std()
roll_mean2 = bom.rolling(window=365).std()
fig = go.Figure()
fig.add_trace(go.Scatter(x=roll_mean.index,y=roll_mean['AvgTemperature'],marker=dict(color='red'),name='31DaysRolling'))
fig.add_trace(go.Scatter(x=roll_mean2.index,y=roll_mean2['AvgTemperature'],marker=dict(color='green'),name='365DaysRolling'))
fig.update_layout(dict(title='Rolling Std'))
Seasonal Decompose¶
from statsmodels.tsa.seasonal import seasonal_decompose
decompose = seasonal_decompose(bom,period=365)
decompose.plot();
Dickey Fuller Test¶
Let's perform a Dickey Fuller test to verify the stationarity
Null hypothesis: The time series data is non stationary
from statsmodels.tsa.stattools import adfuller
adf = adfuller(x=bom['AvgTemperature'])
print('pvalue:',adf[1])
print('adf:',adf[0])
print('usedlag:',adf[2])
print('nobs:',adf[3])
print('critical_values:',adf[4])
print('icbest:',adf[5])
pvalue: 8.043434022164906e-16
adf: -9.356278759389957
usedlag: 36
nobs: 9065
critical_values: {'1%': -3.4310715833098833, '5%': -2.8618588932772657, '10%': -2.5669397418503186}
icbest: 33873.287352081155
The p-value is very small, close to 0. Hence we can reject our null hypothesis. The data is stationary¶
LSTM Model Creation¶
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM,Dense
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.callbacks import EarlyStopping
WARNING:tensorflow:From C:\Users\rbaud\anaconda3\Lib\site-packages\keras\src\losses.py:2976: The name tf.losses.sparse_softmax_cross_entropy is deprecated. Please use tf.compat.v1.losses.sparse_softmax_cross_entropy instead.
Train - Test - Split: Choosing 2019 data for testing and remaining for training¶
test = bom[bom.index>'2019']
train = bom[bom.index<'2019']
scaler = MinMaxScaler()
train = scaler.fit_transform(train)
test = scaler.transform(test)
time_steps = 20
features = 1
train_gen = TimeseriesGenerator(train,train,time_steps,batch_size=32)
test_gen = TimeseriesGenerator(test,test,time_steps,batch_size=32)
model = Sequential()
model.add(LSTM(64,activation='relu',input_shape=(time_steps,features),return_sequences=True))
model.add(LSTM(32,activation='relu'))
model.add(Dense(1,activation='relu'))
model.compile(optimizer='adam',loss='mse')
model.summary()
WARNING:tensorflow:From C:\Users\rbaud\anaconda3\Lib\site-packages\keras\src\backend.py:873: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.
WARNING:tensorflow:From C:\Users\rbaud\anaconda3\Lib\site-packages\keras\src\optimizers\__init__.py:309: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
lstm (LSTM) (None, 20, 64) 16896
lstm_1 (LSTM) (None, 32) 12416
dense (Dense) (None, 1) 33
=================================================================
Total params: 29345 (114.63 KB)
Trainable params: 29345 (114.63 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Defining Callbacks¶
early_stop = EarlyStopping(patience=5)
Fitting the model¶
model.fit(x=train_gen,epochs=40,callbacks=[early_stop],validation_data=test_gen)
#Save the model
model.save('LSTMAvgTemp.h5')
Epoch 1/40 WARNING:tensorflow:From C:\Users\rbaud\anaconda3\Lib\site-packages\keras\src\utils\tf_utils.py:492: The name tf.ragged.RaggedTensorValue is deprecated. Please use tf.compat.v1.ragged.RaggedTensorValue instead. 273/273 [==============================] - 5s 11ms/step - loss: 0.4278 - val_loss: 0.4695 Epoch 2/40 273/273 [==============================] - 2s 9ms/step - loss: 0.4278 - val_loss: 0.4695 Epoch 3/40 273/273 [==============================] - 2s 9ms/step - loss: 0.4278 - val_loss: 0.4695 Epoch 4/40 273/273 [==============================] - 2s 9ms/step - loss: 0.4278 - val_loss: 0.4695 Epoch 5/40 273/273 [==============================] - 2s 9ms/step - loss: 0.4278 - val_loss: 0.4695 Epoch 6/40 273/273 [==============================] - 2s 9ms/step - loss: 0.4278 - val_loss: 0.4695
predict = model.predict(test_gen)
test_targets = test_gen.targets[test_gen.start_index:test_gen.end_index+1]
11/11 [==============================] - 0s 4ms/step
predict = scaler.inverse_transform(predict).ravel()
test_targets = scaler.inverse_transform(test_targets).ravel()
Model Evaluation¶
The predictions of the model for the year 2019 are plotted with the actual target values of 2019
from sklearn.metrics import mean_squared_error
_,ax=plt.subplots(1,1,figsize=(12,8))
sns.lineplot(x=range(len(predict)),y=predict,ax=ax,label='Predictions')
sns.lineplot(x=range(len(test_targets)),y=test_targets,ax=ax,label='Actual')
plt.legend()
_=plt.title('Comparison - Predictions vs Actual Target Temperatures - Bombay (2019)')
print('The RMSE Score is:',format(np.sqrt(mean_squared_error(predict,test_targets)),'.2f'))
The RMSE Score is: 20.01
Forecasting beyond 2019¶
Select the last 'time_steps' values of average temperature from test data. It can be used to forecast further.
data = bom.iloc[-time_steps:].to_numpy() #2D Array
data = scaler.transform(data)
#expand to include batch dimension
data = np.expand_dims(data,0)
#record the last date of observartion from the data
date = bom.index[-1]
date_store = bom.iloc[-time_steps:].index.to_list()
#forecasting
forecasts=10
for i in range(forecasts):
predicted = model.predict(data[:,-20:,:])
date = date+datetime.timedelta(days=1)
data = np.append(data,[predicted],axis=1)
date_store.append(date)
data = scaler.inverse_transform(data.reshape(1,-1))
forecast_df = pd.DataFrame(index=date_store[time_steps-1:],data={'AvgTemperature':data.ravel()[time_steps-1:]})
1/1 [==============================] - 0s 259ms/step 1/1 [==============================] - 0s 16ms/step 1/1 [==============================] - 0s 16ms/step 1/1 [==============================] - 0s 16ms/step 1/1 [==============================] - 0s 16ms/step 1/1 [==============================] - 0s 16ms/step 1/1 [==============================] - 0s 16ms/step 1/1 [==============================] - 0s 16ms/step 1/1 [==============================] - 0s 16ms/step 1/1 [==============================] - 0s 31ms/step
Let's print and plot the forecast_df¶
The first datapoint in forecast_df is not predicted. It is borrowed from the given data to ensure continuity while plotting
forecast_df
| AvgTemperature | |
|---|---|
| 2019-12-31 | 74.3 |
| 2020-01-01 | 63.4 |
| 2020-01-02 | 63.4 |
| 2020-01-03 | 63.4 |
| 2020-01-04 | 63.4 |
| 2020-01-05 | 63.4 |
| 2020-01-06 | 63.4 |
| 2020-01-07 | 63.4 |
| 2020-01-08 | 63.4 |
| 2020-01-09 | 63.4 |
| 2020-01-10 | 63.4 |
_,ax=plt.subplots(1,1,figsize=(12,8))
sns.lineplot(data=bom.iloc[-100:,:],y='AvgTemperature',x=bom.iloc[-100:,:].index,color='blue',ax=ax,label='AvgTemp - 2019 end')
sns.lineplot(data=forecast_df,y='AvgTemperature',x=forecast_df.index,color='red',ax=ax,label= 'AvgTemp - 2020 forecasted')
_=plt.title(f'Temperature Forecasting - {forecasts} days (2020)')
Seasonal ARIMA model¶
!pip3 install -q pmdarima
from pmdarima.arima import auto_arima
from dateutil.relativedelta import relativedelta
The data should be downsampled to train the Seasonal ARIMA model faster. So here we are downsampling the data from daily to monthly frequency. If downsampling is not performed, the model created will be very huge in this case¶
bom_monthly = bom.resample('M').mean()
bom_monthly.head(5)
| AvgTemperature | |
|---|---|
| Date | |
| 1995-01-31 | 72.393548 |
| 1995-02-28 | 76.214286 |
| 1995-03-31 | 79.393333 |
| 1995-04-30 | 84.120000 |
| 1995-05-31 | 86.819355 |
data =[go.Scatter(x=bom.index,y=bom['AvgTemperature'],name='AvTemperature-Daily',marker=dict(color='grey')),go.Scatter(x= bom_monthly.index,y=bom_monthly['AvgTemperature'],name='AvgTemperature-Monthly',marker=dict(color='red'))]
fig = go.Figure(data)
buttons = [dict(label='Both',method='restyle',args=[{'visible':[True,True]}]),
dict(label='Daily',method='restyle',args=[{'visible':[True,False]}]),
dict(label='Monthly',method='restyle',args=[{'visible':[False,True]}])]
updatemenus=[dict(type="buttons",direction='down',buttons=buttons)]
fig.update_layout(updatemenus=updatemenus,title='Average Temperature - Daily and Monthly - Bombay')
The auto arima function can automatically determine arima parameters and model¶
The data is seasonal with the frequency of 12 months
#using default auto_arima arguments
model = auto_arima(bom_monthly,seasonal=True,m=12)
Storing date and temperature of the last row so that forecast plot can be made continuous¶
date = bom_monthly.index[-1]
last_val = bom_monthly.iloc[-1].to_numpy()
Forecasting for the first 5 months in 2020¶
forecasts=5
date_store = [(date + relativedelta(months=i)) for i in range(0,forecasts+1)]
predict = model.predict(forecasts)
predict = np.append(last_val,predict)
forecast_df = pd.DataFrame(data=predict,index=date_store,columns=['AvgTemperature'])
forecast_df.head(5)
| AvgTemperature | |
|---|---|
| 2019-12-31 | 81.635484 |
| 2020-01-31 | 79.518710 |
| 2020-02-29 | 80.583247 |
| 2020-03-31 | 83.056672 |
| 2020-04-30 | 85.841799 |
Plotting results¶
_,ax=plt.subplots(1,1,figsize=(10,10))
sns.lineplot(data=forecast_df['AvgTemperature'],ax=ax,color='red')
sns.lineplot(data=bom_monthly['AvgTemperature'][-4*12:],ax=ax,color='blue')
_=plt.title(f'Forecast - Monthly Temperature Average (2020) - First {forecasts} months')